human

splicing <- c("SE","MXE","RI","A3SS","A5SS")
all_psi <- c()
splice_type <- c()

human <- read.delim("~/Desktop/MISO_proj/EuropeanDataset/data/Sample_Metadata_Human_Heart-Brain-Only.tsv",sep="\t",header = T,stringsAsFactors = F)
human <- human[order(human$Source.Name),]

# Color bars for heatmaps
heart.samps <- human$Source.Name[which(human$Characteristics.organism.part.=="heart")]
brain.samps <- human$Source.Name[which(human$Characteristics.organism.part.!="heart")]

half <- floor(nrow(human)/2)

# # Combine the matrices into one matrix
# for (splice in splicing){
#     x <- read.csv(paste("/Users/alisha/Desktop/faster_Datastore/Alisha/MISO_project/EuropeanDataset/human/cohort_level/miso_counts_",splice,"_mat.csv",sep=""),header=T,row.names = 1,check.names = F)
#     colnames(x) <- sub(".counts.txt","",colnames(x))
#     x <- x[,order(colnames(x))]
#     y <- apply(x,1,function(row){table(is.na(row))["FALSE"] >= half})
#     x <- x[y==T,]
#     splice_type <- c(splice_type,rep(splice,nrow(x)))
#     all_psi <- rbind(all_psi,data.frame(x,SpliceType=splice))
# }
# 
# save(all_psi,file="~/Desktop/MISO_proj/EuropeanDataset/data/AllPSI_human.Rdata")

load("~/Desktop/MISO_proj/EuropeanDataset/data/AllPSI_human.Rdata")

Heart

heart <- data.frame(Samples = human$Source.Name[which(human$Characteristics.organism.part.=="heart")], DevelopStage = human$Characteristics.developmental.stage.[which(human$Characteristics.organism.part.=="heart")],Group <- human$Characteristics.developmental.stage.[which(human$Characteristics.organism.part.=="heart")])

heart$Group[which(heart$DevelopStage == "19 week post conception")] <- "Fetus"
heart$Group[which(heart$DevelopStage == "13 week post conception" | heart$DevelopStage == "16 week post conception" | heart$DevelopStage == "18 week post conception")] <- "VeryLate_Embryo"
heart$Group[which(heart$DevelopStage == "11 week post conception" | heart$DevelopStage == "12 week post conception")] <- "Late_Embryo"
heart$Group[which(heart$DevelopStage == "9 week post conception" | heart$DevelopStage == "10 week post conception")] <- "Midstage_Embryo"
heart$Group[which(heart$DevelopStage == "8 week post conception")] <- "Developing_Embryo"
heart$Group[which(heart$DevelopStage == "7 week post conception")] <- "Early_Embryo"
heart$Group[which(heart$DevelopStage == "4 week post conception" | heart$DevelopStage == "5 week post conception" | heart$DevelopStage == "6 week post conception")] <- "VeryEarly_Embryo"


# table(heart$DevelopStage)
table(heart$Group)
## 
##        adolescent Developing_Embryo      Early_Embryo             Fetus 
##                 1                 5                 3                 3 
##            infant       Late_Embryo      middle adult   Midstage_Embryo 
##                 4                 6                 1                 7 
##           neonate           toddler  VeryEarly_Embryo   VeryLate_Embryo 
##                 3                 2                 6                 8 
##       young adult 
##                 1
colnames(all_psi) <- sub("^X","",colnames(all_psi))
heart.psi <- all_psi[,heart.samps]
heart.psi.complete <- heart.psi[complete.cases(heart.psi),]
heart.psi.complete <- heart.psi.complete[,order(colnames(heart.psi.complete))]
heart.psi.100 <- heart.psi.complete * 100 # turn psi values into integers
heart.psi.100 <- round(heart.psi.100,0)

# dim(heart.psi.100)
design <- model.matrix(~heart$Group)
y = DGEList(counts=heart.psi.100, group = heart$Group)
y <- estimateDisp(y,design)
fit <- glmQLFit(y, design)
fit <- glmQLFTest(fit, coef=2:7)
tab <- as.data.frame(topTags(fit, n=300))
# translate <- c()
# 
# for(splice in splicing){
#     tr <- as.data.frame(fread(paste("/Users/alisha/Desktop/faster_Datastore/Alisha/MISO_project/data/human/", splice, ".hg38lnc.gff3.expanded_attr.csv",sep=""),header=T,stringsAsFactors = F))
#   tr <- tr[which(tr$TranscriptType=="gene"),]
#   tr <- tr[,c("TranscriptType","GeneSymbol","ID")]
# 
#   translate <- rbind(translate,tr)
# }
# head(translate)
# write.table(translate,file="/Users/alisha/Desktop/faster_Datastore/Alisha/MISO_project/data/human/allSplice.hg38lnc.gff3.truncated.tsv",sep="\t",quote=F,row.names = F)

translate <- as.data.frame(fread("/Users/alisha/Desktop/faster_Datastore/Alisha/MISO_project/data/human/allSplice.hg38lnc.gff3.truncated.tsv"))
tab$Gene <- sapply(rownames(tab),function(name){translate$GeneSymbol[which(translate$ID==name)]})

# write.table(tab,file="/Users/alisha/Desktop/MISO_proj/EuropeanDataset/human_Heart_DEisoforms.tsv",sep="\t",col.names = NA)
# kable(head(tab)) %>% kable_styling()
top5 <- c("chr1:93339357:93339536:-@chr1:93337377:93339274:-@chr1:93311490:93311608:-@chr1:93305388:93305470:-",
          "chr1:93339357:93339536:-@chr1:93338230:93339274:-@chr1:93311490:93311608:-@chr1:93305388:93305470:-",
          "chr1:93339357:93339536:-@chr1:93338230:93339274:-@chr1:93305388:93305470:-",
          "chr15:41892777:41893078:+@chr15:41893756|41895584:41897521:+",
          "chr1:93339357:93339536:-@chr1:93337377:93339274:-@chr1:93305388:93305470:-")
    
top5 <- as.data.frame(heart.psi.complete[top5,])
top5 <- merge(top5,translate,by.x=0,by.y=3, all.x = T, all.y=F)
heart$Group <- factor(heart$Group, levels=c("VeryEarly_Embryo",
                                            "Early_Embryo",
                                            "Developing_Embryo",
                                            "Midstage_Embryo",
                                            "Late_Embryo",
                                            "VeryLate_Embryo",
                                            "Fetus",
                                            "neonate",
                                            "infant",
                                            "toddler",
                                            # "school age child",
                                            "adolescent",
                                            "young adult",
                                            "middle adult"))
                                           # "elderly"))

top5.melt <- reshape2::melt(top5)
top5.melt <- merge(top5.melt,heart,by.x=4,by.y = 1)

Top 5 Most Significantly DE Isoforms

ggplot(top5.melt,aes(x=Group, y=value,fill=Group)) + geom_boxplot() + ylab("PSI") + 
    facet_wrap(~Row.names) + theme_bw() + 
    theme(axis.text.x = element_text(color = "black",size = 14, angle = 90, hjust = 1,vjust = 0.5), 
          strip.text.x = element_text(color="black",face = "bold",size=7))

Combining young adult and middle adult, and removing adolescent

heart <- data.frame(Samples = human$Source.Name[which(human$Characteristics.organism.part.=="heart")], DevelopStage = human$Characteristics.developmental.stage.[which(human$Characteristics.organism.part.=="heart")],Group <- human$Characteristics.developmental.stage.[which(human$Characteristics.organism.part.=="heart")])

heart$Group[which(heart$DevelopStage == "19 week post conception")] <- "Fetus"
heart$Group[which(heart$DevelopStage == "13 week post conception" | heart$DevelopStage == "16 week post conception" | heart$DevelopStage == "18 week post conception")] <- "VeryLate_Embryo"
heart$Group[which(heart$DevelopStage == "11 week post conception" | heart$DevelopStage == "12 week post conception")] <- "Late_Embryo"
heart$Group[which(heart$DevelopStage == "9 week post conception" | heart$DevelopStage == "10 week post conception")] <- "Midstage_Embryo"
heart$Group[which(heart$DevelopStage == "8 week post conception")] <- "Developing_Embryo"
heart$Group[which(heart$DevelopStage == "7 week post conception")] <- "Early_Embryo"
heart$Group[which(heart$DevelopStage == "4 week post conception" | heart$DevelopStage == "5 week post conception" | heart$DevelopStage == "6 week post conception")] <- "VeryEarly_Embryo"

heart$Group[which(heart$DevelopStage == "middle adult" | heart$DevelopStage == "young adult" | heart$DevelopStage == "adolescent")] <- "Adult"
heart$Group[which(heart$DevelopStage == "toddler")] <- "infant"
            
# heart <- heart[-which(heart$DevelopStage == "adolescent"),]
heart.samps <- heart$Samples

table(heart$Group)
## 
##             Adult Developing_Embryo      Early_Embryo             Fetus 
##                 3                 5                 3                 3 
##            infant       Late_Embryo   Midstage_Embryo           neonate 
##                 6                 6                 7                 3 
##  VeryEarly_Embryo   VeryLate_Embryo 
##                 6                 8
heart.psi <- all_psi[,heart.samps]
heart.psi.complete <- heart.psi[complete.cases(heart.psi),]
heart.psi.complete <- heart.psi.complete[,order(colnames(heart.psi.complete))]
heart.psi.100 <- heart.psi.complete * 100 # turn psi values into integers
heart.psi.100 <- round(heart.psi.100,0)
design <- model.matrix(~heart$Group)
y = DGEList(counts=heart.psi.100, group = heart$Group)
y <- estimateDisp(y,design)
fit <- glmQLFit(y, design)
fit <- glmQLFTest(fit, coef=2:7)
tab <- as.data.frame(topTags(fit, n=300))
tab$Gene <- sapply(rownames(tab),function(name){translate$GeneSymbol[which(translate$ID==name)]})

# write.table(tab,file="/Users/alisha/Desktop/MISO_proj/EuropeanDataset/human_Heart_DEisoforms.tsv",sep="\t",col.names = NA)
kable(head(tab)) %>% kable_styling()
logFC.heart.GroupDeveloping_Embryo logFC.heart.GroupEarly_Embryo logFC.heart.GroupFetus logFC.heart.Groupinfant logFC.heart.GroupLate_Embryo logFC.heart.GroupMidstage_Embryo logCPM F PValue FDR Gene
chr15:41892777:41893078:+@chr15:41893756|41895584:41897521:+ -4.389741 -5.046646 -2.354436 -0.0135916 -3.383262 -3.582378 8.023539 33.71331 0 0 AC020659.1
chr15:41892777:41893078:+@chr15:41893756|41895580:41897521:+ -4.246148 -4.968700 -2.303531 -0.0254198 -3.306240 -3.560064 7.961190 31.76561 0 0 AC020659.1
chr15:41892777:41893078:+@chr15:41893742|41895584:41897521:+ -4.318738 -5.042154 -2.295809 -0.0165543 -3.379134 -3.523287 8.024377 31.20384 0 0 AC020659.1
chr15:41892777:41893078:+@chr15:41893742|41895580:41897521:+ -4.251849 -4.812156 -2.255668 -0.0369236 -3.366362 -3.484122 7.965296 29.89463 0 0 AC020659.1
chr15:41892777:41893078:+@chr15:41893742|41895523:41897521:+ -4.167608 -4.730326 -2.173184 0.0837477 -3.403682 -3.402290 7.967010 27.61017 0 0 AC020659.1
chr15:41892777:41893078:+@chr15:41893756|41895523:41897521:+ -4.155502 -4.571593 -2.215309 0.0989547 -3.332308 -3.390214 7.957654 27.46999 0 0 AC020659.1
top5 <- c("chr15:41892777:41893078:+@chr15:41893756|41895584:41897521:+",
          "chr15:41892777:41893078:+@chr15:41893756|41895580:41897521:+",
          "chr15:41892777:41893078:+@chr15:41893742|41895584:41897521:+",
          "chr15:41892777:41893078:+@chr15:41893742|41895580:41897521:+",
          "chr15:41892777:41893078:+@chr15:41893742|41895523:41897521:+")
    
top5 <- as.data.frame(heart.psi.complete[top5,])
top5 <- merge(top5,translate,by.x=0,by.y=3, all.x = T, all.y=F)


heart$Group <- factor(heart$Group, levels=c("VeryEarly_Embryo",
                                            "Early_Embryo",
                                            "Developing_Embryo",
                                            "Midstage_Embryo",
                                            "Late_Embryo",
                                            "VeryLate_Embryo",
                                            "Fetus",
                                            "neonate",
                                            "infant",
                                            "toddler",
                                            "Adult"))

top5.melt <- reshape2::melt(top5)
top5.melt <- merge(top5.melt,heart,by.x=4,by.y = 1)
ggplot(top5.melt,aes(x=Group, y=value,fill=Group)) + geom_boxplot() + ylab("PSI") + 
    facet_wrap(~Row.names) + theme_bw() + 
    theme(axis.text.x = element_text(color = "black",size = 14, angle = 90, hjust = 1,vjust = 0.5), 
          strip.text.x = element_text(color="black",face = "bold",size=7))

new <- heart.psi.complete[rownames(tab)[which(tab$FDR<0.05)],]

rownames(new) <- sapply(rownames(new),function(name){paste(translate$GeneSymbol[which(translate$ID==name)],name,sep="_")})


new$transcripts <- rownames(new)
new.melt <- melt(new)
new.melt <- merge(new.melt,heart,by.x=2,by.y=1)

# head(new.melt)
transcripts <- unique(new.melt$transcripts)

pdf("/Users/alisha/Desktop/MISO_proj/EuropeanDataset/human_Heart_DEisoforms_Boxplots.pdf",width = 9,height = 7)
for (trans in transcripts){
  df <- new.melt[which(new.melt$transcripts==trans),]
  
  print(ggplot(df,aes(x=Group, y=value,fill=Group)) + geom_boxplot() + ylab("PSI") +  theme_bw() + 
    ggtitle(paste(str_wrap(trans, width = 75))) + xlab("") + 
    theme(axis.text.x = element_text(color = "black",size = 14, angle = 90, hjust = 1,vjust = 0.5),
          plot.title = element_text(hjust=0.5,size = 12), legend.position = "none") )
}

dev.off()
heart <- heart[order(heart$Group),]

ra <- HeatmapAnnotation(Group = heart$Group,col = list(Group=c("VeryEarly_Embryo"= "purple",
                                                               "Adult" = "coral3", 
                                                               "Midstage_Embryo"="blue",
                                                               "Early_Embryo" = "yellow",
                                                               "Late_Embryo" = "brown", 
                                                               "VeryLate_Embryo"= "pink",
                                                               # "Adolescent"= "cyan3",
                                                               "Developing_Embryo"="deeppink1",
                                                               "Fetus" = "goldenrod",
                                                               "neonate" = "cyan3",
                                                               "infant" = "green")))
                                                               # "toddler" = "midnightblue")))

#ha <- HeatmapAnnotation(SpliceType = splice_type, col=list(SpliceType=c("SE"="red","MXE"="blue","RI"="cyan3","A3SS"="yellow","A5SS"="orange")))

lt05 <- rownames(tab)[which(tab$FDR < 0.05)]

toplot <- heart.psi.complete[lt05,]
toplot <- medianCtr(toplot)

toplot <- toplot[,heart$Samples]

draw(Heatmap(as.matrix(toplot), cluster_columns = F, name = "PSI",column_title = "Top DE Transcripts (FDR < 0.05)\nhuman Heart Samples", clustering_distance_columns = "pearson",clustering_method_columns = "average", bottom_annotation = ra, row_names_max_width = max_text_width(rownames(toplot), gp = gpar(fontsize = 3))),heatmap_legend_side = "left", annotation_legend_side = "left")

tog <- t(heart.psi.complete)
tog <- tog[order(rownames(tog)),]

heart <- heart[order(heart$Samples),]
# identical(heart$Samples,rownames(tog))



anova.res <- as.data.frame(apply(tog,2,function(col){summary(aov(col~heart$Group))[[1]][["Pr(>F)"]][1]}))
colnames(anova.res) <- "Pvalue"
anova.res$FDR <- p.adjust(anova.res$Pvalue,method = "fdr")
head(anova.res)
##                                                                                           Pvalue
## chr10:100231015:100231126:+@chr10:100231246:100231660:+@chr10:100233231:100233443:+ 0.0005203386
## chr10:100373544:100374012:+@chr10:100380895:100381109:+@chr10:100381216:100381448:+ 0.5363823723
## chr10:100373544:100374012:+@chr10:100380983:100381109:+@chr10:100381216:100381448:+ 0.6594444183
## chr10:100373568:100374080:+@chr10:100380983:100381109:+@chr10:100381216:100381448:+ 0.4545368180
## chr10:102450187:102450330:+@chr10:102451399:102452187:+@chr10:102455337:102456297:+ 0.9751767854
## chr10:102450187:102450360:+@chr10:102451399:102452187:+@chr10:102455337:102456297:+ 0.9376430047
##                                                                                            FDR
## chr10:100231015:100231126:+@chr10:100231246:100231660:+@chr10:100233231:100233443:+ 0.00790543
## chr10:100373544:100374012:+@chr10:100380895:100381109:+@chr10:100381216:100381448:+ 0.68021411
## chr10:100373544:100374012:+@chr10:100380983:100381109:+@chr10:100381216:100381448:+ 0.77250427
## chr10:100373568:100374080:+@chr10:100380983:100381109:+@chr10:100381216:100381448:+ 0.62031948
## chr10:102450187:102450330:+@chr10:102451399:102452187:+@chr10:102455337:102456297:+ 0.97793542
## chr10:102450187:102450360:+@chr10:102451399:102452187:+@chr10:102455337:102456297:+ 0.95151082
anova.sig <- anova.res[which(anova.res$FDR<0.05),]
cat("Number of significant transcripts by anova test: ",nrow(anova.sig))
## Number of significant transcripts by anova test:  320
tab.sig <- tab[which(tab$FDR<0.05),]
cat("\nNumber of significant transcripts by edgeR quasi-likelihood test: ",nrow(tab.sig))
## 
## Number of significant transcripts by edgeR quasi-likelihood test:  210
sig.trans <- rownames(anova.sig)[rownames(anova.sig) %in% rownames(tab.sig)]
cat("\nNumber of transcripts shared between anova and edgeR: ",length(sig.trans))
## 
## Number of transcripts shared between anova and edgeR:  174
grid.newpage()
venn <- draw.pairwise.venn(area1= nrow(anova.sig),
                           area2= nrow(tab.sig),
                           cross.area = length(sig.trans),
                           category   = c("Anova", "EdgeR"))
grid.draw(venn)

human Brain

Forebrain

# splicing <- c("SE","MXE","RI","A3SS","A5SS")

# splice_type <- c()

# human <- read.delim("~/Desktop/MISO_proj/EuropeanDataset/data/Sample_Metadata_human_Heart-Brain-Only.tsv",sep="\t",header = T,stringsAsFactors = F)
# human <- human[order(human$Source.Name),]


fore.samps <- human$Source.Name[which(human$Characteristics.organism.part.=="forebrain")]



# Combine the matrices into one matrix
# for (splice in splicing){
#     x <- read.csv(paste("/Users/alisha/Desktop/faster_Datastore/Alisha/MISO_project/EuropeanDataset/human/cohort_level/miso_counts_",splice,"_mat.csv",sep=""),header=T,row.names = 1,check.names = F)
#     colnames(x) <- sub(".counts.txt","",colnames(x))
#     x <- x[,order(colnames(x))]
#     y <- apply(x,1,function(row){table(is.na(row))["FALSE"] >= half})
#     x <- x[y==T,]
#     splice_type <- c(splice_type,rep(splice,nrow(x)))
#     all_psi <- rbind(all_psi,data.frame(x,SpliceType=splice))
#     
# }
fbrain <- data.frame(Samples = human$Source.Name[which(human$Characteristics.organism.part.=="forebrain")], DevelopStage = human$Characteristics.developmental.stage.[which(human$Characteristics.organism.part.=="forebrain")],Group <- human$Characteristics.developmental.stage.[which(human$Characteristics.organism.part.=="forebrain")])

fbrain$Group[which(fbrain$DevelopStage == "19 week post conception")] <- "Fetus"
fbrain$Group[which(fbrain$DevelopStage == "13 week post conception" | fbrain$DevelopStage == "16 week post conception" | fbrain$DevelopStage == "18 week post conception")] <- "VeryLate_Embryo"
fbrain$Group[which(fbrain$DevelopStage == "11 week post conception" | fbrain$DevelopStage == "12 week post conception")] <- "Late_Embryo"
fbrain$Group[which(fbrain$DevelopStage == "9 week post conception" | fbrain$DevelopStage == "10 week post conception")] <- "Midstage_Embryo"
fbrain$Group[which(fbrain$DevelopStage == "8 week post conception")] <- "Developing_Embryo"
fbrain$Group[which(fbrain$DevelopStage == "7 week post conception")] <- "Early_Embryo"
fbrain$Group[which(fbrain$DevelopStage == "4 week post conception" | fbrain$DevelopStage == "5 week post conception" | fbrain$DevelopStage == "6 week post conception")] <- "VeryEarly_Embryo"

# fbrain$Group[which(fbrain$DevelopStage == "middle adult" | fbrain$DevelopStage == "young adult")] <- "Adult"

# fbrain <- fbrain[-which(fbrain$DevelopStage == "adolescent"),]

# table(fbrain$DevelopStage)
table(fbrain$Group)
## 
##        adolescent Developing_Embryo      Early_Embryo           elderly 
##                 4                 4                 3                 2 
##             Fetus            infant       Late_Embryo      middle adult 
##                 5                 2                 5                 2 
##   Midstage_Embryo           neonate  school age child           toddler 
##                 4                 3                 2                 3 
##  VeryEarly_Embryo   VeryLate_Embryo       young adult 
##                 5                 6                 5
fbrain.psi <- all_psi[,fore.samps]
fbrain.psi.complete <- fbrain.psi[complete.cases(fbrain.psi),]
fbrain.psi.complete <- fbrain.psi.complete[,order(colnames(fbrain.psi.complete))]
fbrain.psi.100 <- fbrain.psi.complete * 100 # turn psi values into integers
fbrain.psi.100 <- round(fbrain.psi.100,0)
design <- model.matrix(~fbrain$Group)
y = DGEList(counts=fbrain.psi.100, group = fbrain$Group)
y <- estimateDisp(y,design)
fit <- glmQLFit(y, design)
fit <- glmQLFTest(fit, coef=2:6)
tab <- as.data.frame(topTags(fit, n=450))

tab$Gene <- sapply(rownames(tab),function(name){translate$GeneSymbol[which(translate$ID==name)]})

# write.table(tab, file = "/Users/alisha/Desktop/MISO_proj/EuropeanDataset/human_Forebrain_DEisoforms.tsv",sep="\t",col.names = NA)
kable(head(tab)) %>% kable_styling()
logFC.fbrain.GroupDeveloping_Embryo logFC.fbrain.GroupEarly_Embryo logFC.fbrain.Groupelderly logFC.fbrain.GroupFetus logFC.fbrain.Groupinfant logCPM F PValue FDR Gene
chr5:88268899:88269027:+@chr5:88269468|88270526:88270585:+ 2.913442 2.646533 -0.1412662 1.922600 -0.6306499 7.842873 24.30047 0 0.0e+00 TMEM161B-AS1
chr1:87133614:87133718:+@chr1:87165389:87165981:+@chr1:87168136:87169198:+ 3.651425 3.581139 -0.4012360 3.600045 0.8077651 7.518189 21.29206 0 1.0e-07 AC093155.3
chr2:5968586:5969238:+@chr2:5969330:5969486:+@chr2:5982773:5982857:+@chr2:6000983:6003510:+ -1.710955 -2.843688 -0.9209217 -0.420355 -0.3024015 8.628008 19.23880 0 2.0e-07 SILC1
chr14:100834632-100834751:+@chr14:100835389-100835549:+ 1.913827 1.961178 0.5739953 1.544119 0.1014239 8.838687 17.90957 0 4.0e-07 MEG3
chr14:100834632-100834751:+@chr14:100835370-100835549:+ 1.792982 1.855878 0.5318384 1.368526 0.1323979 8.847548 16.83574 0 8.0e-07 MEG3
chr14:100834632-100834751:+@chr14:100835293-100835549:+ 1.466886 1.495520 0.2660893 1.245227 0.1109822 8.936573 15.67299 0 1.9e-06 MEG3
# translate <- as.data.frame(fread("/Users/alisha/Desktop/faster_Datastore/Alisha/MISO_project/data/human/allSplice.mm10lnc.gff3.truncated.tsv"))
top5 <- c("chr5:88268899:88269027:+@chr5:88269468|88270526:88270585:+",
          "chr1:87133614:87133718:+@chr1:87165389:87165981:+@chr1:87168136:87169198:+",
          "chr2:5968586:5969238:+@chr2:5969330:5969486:+@chr2:5982773:5982857:+@chr2:6000983:6003510:+",
          "chr14:100834632-100834751:+@chr14:100835389-100835549:+",
          "chr14:100834632-100834751:+@chr14:100835370-100835549:+")
    
top5 <- as.data.frame(fbrain.psi.complete[top5,])
top5 <- merge(top5,translate,by.x=0,by.y=3, all.x = T, all.y=F)
fbrain$Group <- factor(fbrain$Group, levels=c("VeryEarly_Embryo",
                                              "Early_Embryo",
                                              "Developing_Embryo",
                                              "Midstage_Embryo",
                                              "Late_Embryo",
                                              "VeryLate_Embryo",
                                              "Fetus",
                                              "neonate",
                                              "infant",
                                              "toddler",
                                              "school age child",
                                              "adolescent",
                                              "young adult",
                                              "middle adult",
                                              "elderly"))

top5.melt <- reshape2::melt(top5)
top5.melt <- merge(top5.melt,fbrain,by.x=4,by.y = 1)

Top 5 Most Significantly DE Isoforms

ggplot(top5.melt,aes(x=Group, y=value,fill=Group)) + geom_boxplot() + ylab("PSI") + 
    facet_wrap(~Row.names) + theme_bw() + 
    theme(axis.text.x = element_text(color = "black",size = 14, angle = 90, hjust = 1,vjust = 0.5), 
          strip.text.x = element_text(color="black",face = "bold",size=7))

fbrain <- data.frame(Samples = human$Source.Name[which(human$Characteristics.organism.part.=="forebrain")], DevelopStage = human$Characteristics.developmental.stage.[which(human$Characteristics.organism.part.=="forebrain")],Group <- human$Characteristics.developmental.stage.[which(human$Characteristics.organism.part.=="forebrain")])

fbrain$Group[which(fbrain$DevelopStage == "19 week post conception")] <- "Fetus"
fbrain$Group[which(fbrain$DevelopStage == "13 week post conception" | fbrain$DevelopStage == "16 week post conception" | fbrain$DevelopStage == "18 week post conception")] <- "VeryLate_Embryo"
fbrain$Group[which(fbrain$DevelopStage == "11 week post conception" | fbrain$DevelopStage == "12 week post conception")] <- "Late_Embryo"
fbrain$Group[which(fbrain$DevelopStage == "9 week post conception" | fbrain$DevelopStage == "10 week post conception")] <- "Midstage_Embryo"
fbrain$Group[which(fbrain$DevelopStage == "8 week post conception")] <- "Developing_Embryo"
fbrain$Group[which(fbrain$DevelopStage == "7 week post conception")] <- "Early_Embryo"
fbrain$Group[which(fbrain$DevelopStage == "4 week post conception" | fbrain$DevelopStage == "5 week post conception" | fbrain$DevelopStage == "6 week post conception")] <- "VeryEarly_Embryo"

fbrain$Group[which(fbrain$DevelopStage == "infant")] <- "toddler"
fbrain$Group[which(fbrain$DevelopStage == "school age child")] <- "adolescent"

fbrain$Group[which(fbrain$DevelopStage == "middle adult" | fbrain$DevelopStage == "young adult")] <- "Adult"

# fbrain <- fbrain[-which(fbrain$DevelopStage == "adolescent"),]

# table(fbrain$DevelopStage)
table(fbrain$Group)
## 
##        adolescent             Adult Developing_Embryo      Early_Embryo 
##                 6                 7                 4                 3 
##           elderly             Fetus       Late_Embryo   Midstage_Embryo 
##                 2                 5                 5                 4 
##           neonate           toddler  VeryEarly_Embryo   VeryLate_Embryo 
##                 3                 5                 5                 6
fbrain.psi <- all_psi[,fore.samps]
fbrain.psi.complete <- fbrain.psi[complete.cases(fbrain.psi),]
fbrain.psi.complete <- fbrain.psi.complete[,order(colnames(fbrain.psi.complete))]
fbrain.psi.100 <- fbrain.psi.complete * 100 # turn psi values into integers
fbrain.psi.100 <- round(fbrain.psi.100,0)
design <- model.matrix(~fbrain$Group)
y = DGEList(counts=fbrain.psi.100, group = fbrain$Group)
y <- estimateDisp(y,design)
fit <- glmQLFit(y, design)
fit <- glmQLFTest(fit, coef=2:6)
tab <- as.data.frame(topTags(fit, n=450))

tab$Gene <- sapply(rownames(tab),function(name){translate$GeneSymbol[which(translate$ID==name)]})

# write.table(tab, file = "/Users/alisha/Desktop/MISO_proj/EuropeanDataset/human_Forebrain_DEisoforms.tsv",sep="\t",col.names = NA)
kable(head(tab)) %>% kable_styling()
logFC.fbrain.GroupAdult logFC.fbrain.GroupDeveloping_Embryo logFC.fbrain.GroupEarly_Embryo logFC.fbrain.Groupelderly logFC.fbrain.GroupFetus logCPM F PValue FDR Gene
chr1:87133614:87133718:+@chr1:87165389:87165981:+@chr1:87168136:87169198:+ -1.3781899 2.948105 2.877820 -1.1045623 2.8967229 7.518165 34.84402 0 0 AC093155.3
chr5:88268899:88269027:+@chr5:88269468|88270526:88270585:+ -0.1205005 2.744842 2.477933 -0.3098665 1.7539938 7.842851 31.65448 0 0 TMEM161B-AS1
chr2:5968586:5969238:+@chr2:5969330:5969486:+@chr2:5982773:5982857:+@chr2:6000983:6003510:+ -0.1570974 -1.721987 -2.854720 -0.9319539 -0.4313871 8.628018 23.45436 0 0 SILC1
chr6:22056546:22056690:+@chr6:22110747:22110920:+@chr6:22191567:22191656:+@chr6:22194045:22194224:+ -0.6514583 2.065047 2.256419 0.7559075 2.4552011 7.589467 22.30854 0 0 CASC15
chr14:100834632-100834751:+@chr14:100835389-100835549:+ 0.2538369 1.778325 1.825676 0.4384934 1.4086168 8.838675 21.97420 0 0 MEG3
chr17:16439037:16439060:+@chr17:16439327:16439393:+@chr17:16439660:16439703:+@chr17:16440185:16440253:+ -0.6657910 -2.745684 -2.848308 -2.1475977 -3.5102037 5.590671 21.46583 0 0 SNHG29
top5 <- c("chr1:87133614:87133718:+@chr1:87165389:87165981:+@chr1:87168136:87169198:+",
          "chr5:88268899:88269027:+@chr5:88269468|88270526:88270585:+",
          "chr2:5968586:5969238:+@chr2:5969330:5969486:+@chr2:5982773:5982857:+@chr2:6000983:6003510:+",
          "chr6:22056546:22056690:+@chr6:22110747:22110920:+@chr6:22191567:22191656:+@chr6:22194045:22194224:+",
          "chr14:100834632-100834751:+@chr14:100835370-100835549:+")
    
top5 <- as.data.frame(fbrain.psi.complete[top5,])
top5 <- merge(top5,translate,by.x=0,by.y=3, all.x = T, all.y=F)

table(fbrain$Group)
## 
##        adolescent             Adult Developing_Embryo      Early_Embryo 
##                 6                 7                 4                 3 
##           elderly             Fetus       Late_Embryo   Midstage_Embryo 
##                 2                 5                 5                 4 
##           neonate           toddler  VeryEarly_Embryo   VeryLate_Embryo 
##                 3                 5                 5                 6
fbrain$Group <- factor(fbrain$Group, levels=c("VeryEarly_Embryo",
                                              "Early_Embryo",
                                              "Developing_Embryo",
                                              "Midstage_Embryo",
                                              "Late_Embryo",
                                              "VeryLate_Embryo",
                                              "Fetus",
                                              "neonate",
                                              # "infant",
                                              "toddler",
                                              # "school age child",
                                              "adolescent",
                                              # "young adult",
                                              # "middle adult",
                                              "Adult",
                                              "elderly"))

top5.melt <- reshape2::melt(top5)
top5.melt <- merge(top5.melt,fbrain,by.x=4,by.y = 1)
ggplot(top5.melt,aes(x=Group, y=value,fill=Group)) + geom_boxplot() + ylab("PSI") + 
    facet_wrap(~Row.names) + theme_bw() + 
    theme(axis.text.x = element_text(color = "black",size = 14, angle = 90, hjust = 1,vjust = 0.5), 
          strip.text.x = element_text(color="black",face = "bold",size=7))

fbrain <- fbrain[order(fbrain$Group),]

ra <- HeatmapAnnotation(Group = fbrain$Group,col = list(Group=c("VeryEarly_Embryo"= "purple",
                                                               "Adult" = "coral3", 
                                                               "Midstage_Embryo"="blue",
                                                               "Early_Embryo" = "yellow",
                                                               "Late_Embryo" = "brown", 
                                                               "VeryLate_Embryo"="pink",
                                                               "adolescent"= "cyan3",
                                                               "Developing_Embryo"="deeppink1",
                                                               "Fetus" = "goldenrod",
                                                               "neonate" = "skyblue",
                                                               # "infant" = "green",
                                                               "toddler" = "midnightblue",
                                                               "elderly" = "magenta")))

#ha <- HeatmapAnnotation(SpliceType = splice_type, col=list(SpliceType=c("SE"="red","MXE"="blue","RI"="cyan3","A3SS"="yellow","A5SS"="orange")))

lt05 <- rownames(tab)[which(tab$FDR < 0.05)]


toplot <- fbrain.psi.complete[lt05,]
toplot <- medianCtr(toplot) #log2(toplot+1))

toplot <- toplot[,fbrain$Samples]


draw(Heatmap(as.matrix(toplot), cluster_columns = F, name = "PSI",column_title = "Top DE Transcripts (FDR < 0.05)\nhuman Forebrain Samples", clustering_distance_columns = "pearson",clustering_method_columns = "average", bottom_annotation = ra, row_names_max_width = max_text_width(rownames(toplot), gp = gpar(fontsize = 3))),heatmap_legend_side = "left", annotation_legend_side = "left")

tog <- t(fbrain.psi.complete)
tog <- tog[order(rownames(tog)),]

fbrain <- fbrain[order(fbrain$Samples),]
identical(fbrain$Samples,rownames(tog))
## [1] TRUE
anova.res <- as.data.frame(apply(tog,2,function(col){summary(aov(col~fbrain$Group))[[1]][["Pr(>F)"]][1]}))
colnames(anova.res) <- "Pvalue"
anova.res$FDR <- p.adjust(anova.res$Pvalue,method = "fdr")
# head(anova.res)
anova.sig <- anova.res[which(anova.res$FDR<0.05),]
cat("Number of significant transcripts by anova test: ",nrow(anova.sig))
## Number of significant transcripts by anova test:  1000
tab.sig <- tab[which(tab$FDR<0.05),]
cat("\nNumber of significant transcripts by edgeR quasi-likelihood test: ",nrow(tab.sig))
## 
## Number of significant transcripts by edgeR quasi-likelihood test:  450
sig.trans <- rownames(anova.sig)[rownames(anova.sig) %in% rownames(tab.sig)]
cat("\nNumber of transcripts shared between anova and edgeR: ",length(sig.trans))
## 
## Number of transcripts shared between anova and edgeR:  403
grid.newpage()
venn <- draw.pairwise.venn(area1= nrow(anova.sig),
                           area2= nrow(tab.sig),
                           cross.area = length(sig.trans),
                           category   = c("Anova", "EdgeR"))
grid.draw(venn)

Hindbrain

# splicing <- c("SE","MXE","RI","A3SS","A5SS")

hind.samps <- human$Source.Name[which(human$Characteristics.organism.part.=="hindbrain")]
hbrain <- data.frame(Samples = human$Source.Name[which(human$Characteristics.organism.part.=="hindbrain")], DevelopStage = human$Characteristics.developmental.stage.[which(human$Characteristics.organism.part.=="hindbrain")],Group <- human$Characteristics.developmental.stage.[which(human$Characteristics.organism.part.=="hindbrain")])

# hbrain$Group[which(hbrain$DevelopStage == "19 week post conception")] <- "Fetus"
hbrain$Group[which(hbrain$DevelopStage == "13 week post conception" | hbrain$DevelopStage == "16 week post conception" | hbrain$DevelopStage == "18 week post conception")] <- "VeryLate_Embryo"
hbrain$Group[which(hbrain$DevelopStage == "11 week post conception" | hbrain$DevelopStage == "12 week post conception")] <- "Late_Embryo"
hbrain$Group[which(hbrain$DevelopStage == "9 week post conception" | hbrain$DevelopStage == "10 week post conception")] <- "Midstage_Embryo"
hbrain$Group[which(hbrain$DevelopStage == "8 week post conception")] <- "Developing_Embryo"
hbrain$Group[which(hbrain$DevelopStage == "7 week post conception")] <- "Early_Embryo"
hbrain$Group[which(hbrain$DevelopStage == "4 week post conception" | hbrain$DevelopStage == "5 week post conception" | hbrain$DevelopStage == "6 week post conception")] <- "VeryEarly_Embryo"

# hbrain$Group[which(hbrain$DevelopStage == "middle adult" | hbrain$DevelopStage == "young adult")] <- "Adult"

# hbrain <- hbrain[-which(hbrain$DevelopStage == "adolescent"),]

# table(hbrain$DevelopStage)
table(hbrain$Group)
## 
##        adolescent Developing_Embryo      Early_Embryo           elderly 
##                 4                 4                 3                 2 
##            infant       Late_Embryo      middle adult   Midstage_Embryo 
##                 3                 7                 2                 4 
##           neonate  school age child           toddler  VeryEarly_Embryo 
##                 5                 3                 2                 9 
##   VeryLate_Embryo       young adult 
##                 6                 5
# colnames(all_psi) <- sub("^X","",colnames(all_psi))
hbrain.psi <- all_psi[,hind.samps]
hbrain.psi.complete <- hbrain.psi[complete.cases(hbrain.psi),]
hbrain.psi.complete <- hbrain.psi.complete[,order(colnames(hbrain.psi.complete))]
hbrain.psi.100 <- hbrain.psi.complete * 100 # turn psi values into integers
hbrain.psi.100 <- round(hbrain.psi.100,0)
design <- model.matrix(~hbrain$Group)
y = DGEList(counts=hbrain.psi.100, group = hbrain$Group)
y <- estimateDisp(y,design)
fit <- glmQLFit(y, design)
fit <- glmQLFTest(fit, coef=2:6)
tab <- as.data.frame(topTags(fit, n=500))

tab$Gene <- sapply(rownames(tab),function(name){translate$GeneSymbol[which(translate$ID==name)]})

# write.table(tab, file = "/Users/alisha/Desktop/MISO_proj/EuropeanDataset/human_Hindbrain_DEisoforms.tsv",sep="\t",col.names = NA)
kable(head(tab)) %>% kable_styling()
logFC.hbrain.GroupDeveloping_Embryo logFC.hbrain.GroupEarly_Embryo logFC.hbrain.Groupelderly logFC.hbrain.Groupinfant logFC.hbrain.GroupLate_Embryo logCPM F PValue FDR Gene
chr19:27793000:27793232:@chr19:27778711:27779043:@chr19:27773766:27774252:- -1.485768 -2.661664 0.9872802 0.9549143 -1.9088499 7.017195 17.99017 0 3e-07 LINC00662
chr2:5968586:5969238:+@chr2:5982298|5982773:5982857:+ 2.559598 2.643883 0.2950541 1.4673478 2.2797178 8.318076 17.92197 0 3e-07 SILC1
chr6:30296132:30296237:@chr6:30289494|30292599:30286703:- 1.162813 1.072769 0.3665222 0.3020122 0.9891289 8.680593 17.88935 0 3e-07 HCG18%2CHCG17
chr11:122292284:122292403:@chr11:122180336:122180398:@chr11:122155551:122155632:@chr11:122089196:122091719:- -2.389528 -2.408056 0.4106787 0.5236948 -2.3381540 6.969478 17.51449 0 3e-07 MIR100HG
chr19:27793000:27793232:@chr19:27778711:27779043:@chr19:27774161:27774792:- -1.385682 -2.402827 0.8870307 0.6670415 -1.9397886 7.141349 17.43440 0 3e-07 LINC00662
chr14:100834632-100834751:+@chr14:100835293-100835549:+ 1.113765 1.270986 0.0202678 -0.2823901 1.1398182 8.591863 17.05770 0 3e-07 MEG3
top5 <- c("chr19:27793000:27793232:-@chr19:27778711:27779043:-@chr19:27773766:27774252:-",
          "chr2:5968586:5969238:+@chr2:5982298|5982773:5982857:+",
          "chr6:30296132:30296237:-@chr6:30289494|30292599:30286703:-",
          "chr11:122292284:122292403:-@chr11:122180336:122180398:-@chr11:122155551:122155632:-@chr11:122089196:122091719:-",
          "chr19:27793000:27793232:-@chr19:27778711:27779043:-@chr19:27774161:27774792:-")
    
top5 <- as.data.frame(hbrain.psi.complete[top5,])
top5 <- merge(top5,translate,by.x=0,by.y=3, all.x = T, all.y=F)
hbrain$Group <- factor(hbrain$Group, levels=c("VeryEarly_Embryo",
                                              "Early_Embryo",
                                              "Developing_Embryo",
                                              "Midstage_Embryo",
                                              "Late_Embryo",
                                              "VeryLate_Embryo",
                                              # "Fetus",
                                              "neonate",
                                              "infant",
                                              "toddler",
                                              "school age child",
                                              "adolescent",
                                              "young adult",
                                              "middle adult",
                                              # "Adult",
                                              "elderly"))

top5.melt <- reshape2::melt(top5)
top5.melt <- merge(top5.melt,hbrain,by.x=4,by.y = 1)

Top 5 Most Significantly DE Isoforms

ggplot(top5.melt,aes(x=Group, y=value,fill=Group)) + geom_boxplot() + ylab("PSI") + 
    facet_wrap(~Row.names) + theme_bw() + 
    theme(axis.text.x = element_text(color = "black",size = 14, angle = 90, hjust = 1,vjust = 0.5), 
          strip.text.x = element_text(color="black",face = "bold",size=7))

new <- hbrain.psi.complete[rownames(tab)[which(tab$FDR<0.05)],]

rownames(new) <- sapply(rownames(new),function(name){paste(translate$GeneSymbol[which(translate$ID==name)],name,sep="_")})


new$transcripts <- rownames(new)
new.melt <- melt(new)
new.melt <- merge(new.melt,hbrain,by.x=2,by.y=1)

# head(new.melt)
hbrain <- hbrain[order(hbrain$Group),]

ra <- HeatmapAnnotation(Group = hbrain$Group,col = list(Group=c("VeryEarly_Embryo"= "purple",
                                                               "middle adult" = "coral3", 
                                                               "Midstage_Embryo"="blue",
                                                               "Early_Embryo" = "yellow",
                                                               "Late_Embryo" = "brown", 
                                                               "VeryLate_Embryo"="pink",
                                                               "adolescent"= "cyan3",
                                                               "Developing_Embryo"="deeppink1",
                                                               "Fetus" = "goldenrod",
                                                               "neonate" = "skyblue",
                                                               "infant" = "green",
                                                               "school age child" = "grey60",
                                                               "toddler" = "midnightblue",
                                                               "young adult" = "black",
                                                               "elderly" = "magenta")))

#ha <- HeatmapAnnotation(SpliceType = splice_type, col=list(SpliceType=c("SE"="red","MXE"="blue","RI"="cyan3","A3SS"="yellow","A5SS"="orange")))

lt05 <- rownames(tab)[which(tab$FDR < 0.05)]

toplot <- hbrain.psi.complete[lt05,]
toplot <- medianCtr(toplot) #log2(toplot+1))


draw(Heatmap(as.matrix(toplot), cluster_columns = F, name = "PSI",column_title = "Top DE Transcripts (FDR < 0.05)\nhuman Hindbrain Samples", clustering_distance_columns = "pearson",clustering_method_columns = "average", bottom_annotation = ra, row_names_max_width = max_text_width(rownames(toplot), gp = gpar(fontsize = 3))),heatmap_legend_side = "left", annotation_legend_side = "left")

tog <- t(hbrain.psi.complete)
tog <- tog[order(rownames(tog)),]

hbrain <- hbrain[order(hbrain$Samples),]
# identical(hbrain$Samples,rownames(tog))



anova.res <- as.data.frame(apply(tog,2,function(col){summary(aov(col~hbrain$Group))[[1]][["Pr(>F)"]][1]}))
colnames(anova.res) <- "Pvalue"
anova.res$FDR <- p.adjust(anova.res$Pvalue,method = "fdr")
# head(anova.res)
anova.sig <- anova.res[which(anova.res$FDR<0.05),]
cat("Number of significant transcripts by anova test: ",nrow(anova.sig))
## Number of significant transcripts by anova test:  851
tab.sig <- tab[which(tab$FDR<0.05),]
cat("\nNumber of significant transcripts by edgeR quasi-likelihood test: ",nrow(tab.sig))
## 
## Number of significant transcripts by edgeR quasi-likelihood test:  354
sig.trans <- rownames(anova.sig)[rownames(anova.sig) %in% rownames(tab.sig)]
cat("\nNumber of transcripts shared between anova and edgeR: ",length(sig.trans))
## 
## Number of transcripts shared between anova and edgeR:  315
grid.newpage()
venn <- draw.pairwise.venn(area1= nrow(anova.sig),
                           area2= nrow(tab.sig),
                           cross.area = length(sig.trans),
                           category   = c("Anova", "EdgeR"))
grid.draw(venn)